function E = ThetaEEstimator(ProductionDataNonCum,Display)
% %[tau0,tau1,phi,ThetaE,studlistE,QE]
if nargin==1
    Display = 0 ;
end
studlist     = unique(ProductionDataNonCum.sid) ;
studlisttemp = [] ; %%%%This will be the list of student ids for people who completed >=2 tasks
marginallist = [] ; %%%%This will be the list of student ids for people who completed 1 task but not 2
for ii=1:length(studlist)
    if max(ProductionDataNonCum.k(ProductionDataNonCum.sid==studlist(ii)))==1
        marginallist = [marginallist; studlist(ii)] ; %#ok
    else
        studlisttemp = [studlisttemp; studlist(ii)] ; %#ok
    end
end
id       = ProductionDataNonCum.sid ;
tau      = ProductionDataNonCum.t ;
q        = ProductionDataNonCum.k ;
BurninModel = 1 ;  %%%%When this takes a value of 1, that means we add an extra term to the time equation for a first-unit fixed effect.
if BurninModel==1
    burnin   = 1*(q==1) ;
end
E.BurninModel = BurninModel ;


logtaudiff = [] ;
logqdiff   = [] ;
if BurninModel==1  %%%%This is an alternate specification where we allow for a fixed effect on the first 1 or 2 units of output, while the student figures out how to use the website...
    burnindiff = [] ;
end
for ii=1:length(studlist)
    indx = find(id==studlist(ii)) ;
    if indx>=2
        tautemp    = tau(indx) ;
        qtemp      = q(indx) ;
        logtaudiff = [logtaudiff; log(tautemp(2:end)./tautemp(1:end-1))] ; %#ok
        logqdiff = [logqdiff; log(qtemp(2:end)./qtemp(1:end-1))] ; %#ok
        if BurninModel==1
            burnintemp = burnin(indx) ;
            burnindiff = [burnindiff; (burnintemp(2:end)-burnintemp(1:end-1))] ; %#ok
        end
    end
end


if BurninModel==1
    X = [burnindiff logqdiff] ;
    Y = logtaudiff ;
    tempparams = (X'*X)\X'*Y ;
    E.tau1 = exp(tempparams(1)) ;
    E.phi = -1*tempparams(2) ;
    Z = log(tau) - tempparams(1)*burnin + E.phi*log(q) ;
    E.tau0 = exp( mean(Z) ) ;  %%%%This is the baseline mean time to first success
else
    E.phi = -1*( mean( logtaudiff./logqdiff ) ) ;  %%%%This is the learning parameter
    Z = log(tau) + E.phi*log(q) ;
    E.tau0 = exp( mean(Z) ) ;  %%%%This is the baseline mean time to first success
end
%%
QE        = nan(size(studlist)) ;  %%%%This is the number of units of total output
T         = nan(size(studlist)) ;  %%%%This is the total time spent
ThetaE    = nan(size(studlist)) ;  %%%%This is the unobserved learning efficiency characteristic
FirstTime = nan(size(studlist)) ;  %%%%This is work time on the first unit of output.
U      = [] ;
ThetaEwithU = [] ;
for ii=1:length(studlist)
    indx          = find(id==studlist(ii)) ;
    QE(ii)        = length(indx) ;
    T(ii)         = sum(tau(indx)) ;
    FirstTime(ii) = tau(find(id==studlist(ii) & q==1,1,'first')) ;  %%%%We use the first command here because sometimes this function will be used for bootstrapping, in which case we may have multiple occurrances of the same student id.
    ThetaE(ii)    = exp( sum( Z(indx) - log(E.tau0) )/QE(ii) ) ;
    if BurninModel==1
        U = [U; tau(indx)./( ThetaE(ii)*E.tau0.*(E.tau1.^burnin(indx)).*(q(indx).^(-E.phi)) )] ; %#ok
    else
        U = [U; tau(indx)./( ThetaE(ii)*E.tau0.*(q(indx).^(-E.phi)) )] ; %#ok
    end
    ThetaEwithU   = [ThetaEwithU; ThetaE(ii)*ones(length(indx),1)] ; %#ok
end


%%%%%In this next block of code, we make addjustments for extreme values of ThetaE based on small within-student samples of
%%%%%Q.  If Q>1, we re-set those values to the most extreme values (big or small) observed within the set of students who
%%%%%completed at least MinQ learning tasks.  If Q==1 we throw out the related values of ThetaE and U.
minQ       = 5 ;
Ebounds    = [min(ThetaE(QE>=minQ)) max(ThetaE(QE>=minQ))] ;
adjustlist = studlist( ThetaE<Ebounds(1) | ThetaE>Ebounds(2) ) ; 

for ii=1:length(adjustlist)
    if ThetaE(studlist==adjustlist(ii)) < Ebounds(1) && QE(studlist==adjustlist(ii))>1
        ThetaE(studlist==adjustlist(ii)) = Ebounds(1) ;
    elseif ThetaE(studlist==adjustlist(ii)) > Ebounds(2) && QE(studlist==adjustlist(ii))>1
        ThetaE(studlist==adjustlist(ii)) = Ebounds(2) ;
    else %%%%In this case it must be that Q==1 and the ThetaE value was very small or very big...

        ThetaEtemp = ThetaE(studlist==adjustlist(ii)) ;
        U(ThetaEwithU==ThetaEtemp) = [] ;
        ThetaEwithU(ThetaEwithU==ThetaEtemp) = [] ;
        ThetaE(studlist==adjustlist(ii)) = nan ;
        marginallist(marginallist==adjustlist(ii)) = [] ;
    end

end




%
E.U            = U ;
E.ThetaEwithU  = ThetaEwithU ;

E.ThetaE       = ThetaE ;
E.FirstTime    = FirstTime ;
E.studlistFULL = studlist ;
E.studlist     = studlisttemp ;
E.marginallist = marginallist ;


if Display==1



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%         FIGURE 3, RIGHT PANEL:            %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    figure
    [F_e,X_e] = ecdf(E.ThetaE*E.tau0) ;
    stairs(X_e,F_e,'LineWidth',4)
    xlim([1,30])
    a = get(gca,'XTickLabel') ;
    set(gca,'XTickLabel',a,'fontsize',16)
    legend(strcat('Mean:',num2str(round(mean(E.ThetaE*E.tau0,'omitnan'),1)),', Median:',num2str(round(median(E.ThetaE*E.tau0,'omitnan'),1)),', StDev:',num2str(round(std(E.ThetaE*E.tau0,'omitnan'),1))),'Location','SouthEast','FontSize',24,'FontWeight','bold')
    xlabel('MEAN TASK COMPLETION TIME, \theta_{pi}\times\tau_0, FOR ACTIVE STUDENTS (minutes)','FontSize',24,'FontWeight','bold')
    ylabel('EMPIRICAL CDF','FontSize',24,'FontWeight','bold')
    box on
    grid on
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%     TIME SHOCK DISTRIBUTION ESTIMATION    %%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Ugridpoints = 500 ;


%%
%%%%-----------------------------------------------------------------------
%%%%-----------------------------------------------------------------------
%%%%OPTION 2: FIT A SET OF FLEXIBLE B-SPLINE CDFS TO THE CONDITIONAL SHOCK
%%%%DISTRIBUTIONS, WHERE CONDITIONING IS ON SEGMENTS OF THE DISTRIBUTION OF
%%%%THETA_E.  I.E., IF "segments=5" BELOW, WE ESTIMATE A SEPARATE SHOCK
%%%%B-SPLINE CDF FOR EACH QUINTILE OF THE THETA_E DISTRIBUTION.
%%%%IMPORTANT NOTE: the ThetaLEstimatorMPEC.m code is designed to be compatible with the ouputs
%%%%from OPTION 2, but not OPTIONS 1 or 3...
%%%%-----------------------------------------------------------------------
%%%%-----------------------------------------------------------------------
%%%%Here we estimate the distribution of production time shocks.  In order
%%%%to make tail behavior more manageable, we estimate the distribution in
%%%%log units, and then transform back into the original units.
K_u = 4 ;
q   = (0:K_u)'/K_u ;
outliercutoff = 4 ; %%%%This is how many standard deviations away from the median will be used to define an outlier.

ThetaEsamp       = E.ThetaE ;
[F0,X0]          = ecdf(ThetaEsamp) ;
segments         = 5 ;  %%%%This is how many segments of different ThetaE values we partition the domain into for heteroskedasticity purposes
cutoffs          = linspace(0,1,segments+1)' ;
cutoffs(1)       = X0(1)*0.99 ;
cutoffs(end)     = X0(end)*1.01 ;
cutoffs(2:end-1) = interp1(F0,X0,cutoffs(2:end-1),'linear') ;

%%%%  The following will hold shock distribution estimates specific to
%%%%  ThetaE within each bin defined by the partition "cutoffs"
UdomHet       = nan(Ugridpoints,segments) ;  %%%%The domain grid for shocks
F_usmoothHet  = nan(Ugridpoints,segments) ;  %%%%CDF of shocks
f_usmoothHet  = nan(Ugridpoints,segments) ;  %%%%PDF of shocks
NHet          = nan(segments,3) ;

rng(2000000) ;
tempsamp = rand(2000000,1) ;
for ii=1:segments
    epsilonsamp2 = log( E.U(E.ThetaEwithU>=cutoffs(ii) & E.ThetaEwithU<=cutoffs(ii+1)) ) ;
    NHet(ii,1)   = length(epsilonsamp2) ;
    stopflag     = 0 ;
    while stopflag==0
        test = sum(abs(epsilonsamp2-median(epsilonsamp2))>outliercutoff*std(epsilonsamp2)) ;
        if test>0
            cutpoint     = outliercutoff*std(epsilonsamp2) ;
            epsilonsamp2 = epsilonsamp2(abs(epsilonsamp2-median(epsilonsamp2))<=cutpoint) ;
        else
            stopflag = 1 ;
        end
    end
    NHet(ii,2) = length(epsilonsamp2) ;
    NHet(ii,3) = std(epsilonsamp2) ;


    [F,X,FLO,FUP] = ecdf(epsilonsamp2) ;
    knotvec_u     = [min(epsilonsamp2); interp1(F(2:end),X(2:end),q(2:end-1),'linear'); max(epsilonsamp2)] ;
    %%%%Even after the log transformation, we still have long tails, so we
    %%%%strategically place extra knots near the endpoints for added
    %%%%flexibility:
    knotvec_u             = sort([knotvec_u; sum(knotvec_u(1:2))/2; sum(knotvec_u(end-1:end))/2]) ;
    knotvec_u             = sort([knotvec_u; sum(knotvec_u(2:3))/2; sum(knotvec_u(end-2:end-1))/2]) ;
    K_u                   = length(knotvec_u)-1 ; %#ok %%%%After the above 2 lines, K_u is now 6 rather than 4...
    beta_u                = BsplineCDFLsqFit(epsilonsamp2,knotvec_u,unique(epsilonsamp2)) ;
    Epsilondom            = interp1(F([1;(3:length(F))']),X([1;(3:length(F))']),linspace(0,1,Ugridpoints)','linear') ;
    [F_usmooth,f_usmooth] = BsplineEval3(knotvec_u,beta_u,Epsilondom) ;
    %%%%Now we transform back to the original units of the schock:
    X         = exp(X) ;
    Udom      = exp(Epsilondom) ;
    f_usmooth = f_usmooth./Udom ;
    if Display==1
        %%%%Double check everything went well by integrating f_usmooth:
        h         = Udom(2:end)-Udom(1:end-1);
% % % % %         IntCheck  = sum((h/2).*(f_usmooth(2:end)+f_usmooth(1:end-1))) %#ok




        figure(5)
        hold on
        if ii==1
            plot(Udom,f_usmooth,'LineWidth',4)
        elseif ii==2
            plot(Udom,f_usmooth,'--','LineWidth',4)
        elseif ii==3
            plot(Udom,f_usmooth,'-.','LineWidth',4)
        elseif ii==4
            plot(Udom,f_usmooth,':','LineWidth',4)
        else
            plot(Udom,f_usmooth,'--','LineWidth',4)
        end
        xlim([0,6])
    end

    UdomHet(:,ii)       = Udom ;
    F_usmoothHet(:,ii)  = F_usmooth ;
    f_usmoothHet(:,ii)  = f_usmooth ;
end
if Display==1

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%              FIGURE OS.2:                 %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    figure(5)
    hold on
    box on
    grid on
    xlim([0,3])
    a = get(gca,'XTickLabel') ;
    set(gca,'XTickLabel',a,'fontsize',16)
    xlabel('Work-Time Shocks, U_a','FontSize',30,'FontWeight','bold')
    ylabel('Conditional PDFs, Given \theta_p \in I^p_j','FontSize',30,'FontWeight','bold')
    if segments==2
        legend('Lower Half','Upper Half')
    elseif segments==3
        legend('Lower Third','Middle Third','Upper Third')
    elseif segments==4
        legend('Lower Quartile','Lower Middle Quartile','Upper Middle Quartile','Upper Quartile')
    elseif segments==5
        legend('Lower Quintile','Second Quintile','Middle Quintile','Third Quintile','Upper Quintile','FontSize',20,'FontWeight','bold')
    elseif segments>5
        cmd = 'legend(''segment 1''' ;
        for ii=2:segments
            cmd = strcat(cmd,',''segment ',num2str(ii),'''') ;
        end
        cmd = strcat(cmd,');') ;
        eval(cmd)
    end
% % % % %     disp(NHet)




    %%%%THIS LOOP WILL PLOT A FIGURE TO COMPARE THE GOODNESS OF FIT OF THE
    %%%%CONDITIONAL SHOCK DISTRIBUTIONS TO THE UNCONDITIONAL EMPIRICAL CDF OF
    %%%%SHOCKS:
    SAMP = [] ;
    for ii=1:segments
        tempThetaE = unique(ThetaEsamp(ThetaEsamp>=cutoffs(ii) & ThetaEsamp<=cutoffs(ii+1))) ;
        for jj=1:length(tempThetaE)
            N = sum(E.ThetaEwithU==tempThetaE(jj));
            Usimsamp = rand(N*100,1) ;
            Ftemp = round(F_usmoothHet(:,ii),6) ;
            dropflag = find(Ftemp(2:end)==Ftemp(1:end-1))  ;
            Ftemp(dropflag) = [] ;
            Utemp = round(UdomHet(:,ii),6) ;
            Utemp(dropflag) = [] ;
            Usimsamp = interp1(Ftemp,Utemp,Usimsamp,'linear') ;
            SAMP = [SAMP; Usimsamp] ; %#ok
        end
    end
end
E.UdomHet                       = UdomHet ;
E.F_usmoothHet                  = F_usmoothHet ;
E.cutoffs                       = cutoffs ;
ThetaEActive                    = nan(size(E.studlist)) ;
ThetaEMarginal                  = nan(size(E.marginallist)) ;
for ii=1:length(E.studlist)
    ThetaEActive(ii) = E.ThetaE(E.studlistFULL==E.studlist(ii)) ;
end
for ii=1:length(E.marginallist)
    ThetaEMarginal(ii) = E.ThetaE(E.studlistFULL==E.marginallist(ii)) ;
end
E.ShockDistMassMarginal         = zeros(length(E.cutoffs)-1,1);
E.ThetaEActive                  = ThetaEActive ;
E.ThetaEMarginal                = ThetaEMarginal ;
for ii=1:length(E.ShockDistMassMarginal)
    E.ShockDistMassMarginal(ii) = mean(ThetaEMarginal<E.cutoffs(ii+1) & ThetaEMarginal>=E.cutoffs(ii));
end










end